Principal Components
t-SNE
LDA
UMAP ( Uniform Manifold Approximation and Projection )
!pip install plotly
Requirement already satisfied: plotly in c:\programdata\anaconda3\lib\site-packages (5.10.0) Requirement already satisfied: tenacity>=6.2.0 in c:\programdata\anaconda3\lib\site-packages (from plotly) (8.0.1)
!pip install umap
Requirement already satisfied: umap in c:\programdata\anaconda3\lib\site-packages (0.1.1)
import numpy as np
import pandas as pd
import time
# For plotting
import plotly.io as plt_io
import plotly.graph_objects as go
%matplotlib inline
#PCA
from sklearn.decomposition import PCA
#TSNE
from sklearn.manifold import TSNE
#UMAP
import umap
#LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
train = pd.read_csv(r'C:\Users\Hanitha.Varsini\Desktop\Tasks\temp\sign_mnist\sign_mnist_test\sign_mnist_test.csv')
train.head()
| label | pixel1 | pixel2 | pixel3 | pixel4 | pixel5 | pixel6 | pixel7 | pixel8 | pixel9 | ... | pixel775 | pixel776 | pixel777 | pixel778 | pixel779 | pixel780 | pixel781 | pixel782 | pixel783 | pixel784 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 149 | 149 | 150 | 150 | 150 | 151 | 151 | 150 | 151 | ... | 138 | 148 | 127 | 89 | 82 | 96 | 106 | 112 | 120 | 107 |
| 1 | 5 | 126 | 128 | 131 | 132 | 133 | 134 | 135 | 135 | 136 | ... | 47 | 104 | 194 | 183 | 186 | 184 | 184 | 184 | 182 | 180 |
| 2 | 10 | 85 | 88 | 92 | 96 | 105 | 123 | 135 | 143 | 147 | ... | 68 | 166 | 242 | 227 | 230 | 227 | 226 | 225 | 224 | 222 |
| 3 | 0 | 203 | 205 | 207 | 206 | 207 | 209 | 210 | 209 | 210 | ... | 154 | 248 | 247 | 248 | 253 | 236 | 230 | 240 | 253 | 255 |
| 4 | 3 | 188 | 191 | 193 | 195 | 199 | 201 | 202 | 203 | 203 | ... | 26 | 40 | 64 | 48 | 29 | 46 | 49 | 46 | 46 | 53 |
5 rows × 785 columns
train.shape
(7172, 785)
print(len(train.label.unique()))
print(train.label.unique())
24 [ 6 5 10 0 3 21 14 7 8 12 4 22 2 15 1 13 19 18 23 17 20 16 11 24]
#picking only the first 10 labels
train = train[train['label'] < 10]
# Setting the label and the feature columns
y = train.loc[:,'label'].values
x = train.loc[:,'pixel1':].values
def plot_2d(component1, component2):
fig = go.Figure(data=go.Scatter(
x = component1,
y = component2,
mode='markers',
marker=dict(
size=20,
color=y, #set color equal to a variable
colorscale='Rainbow', # one of plotly colorscales
showscale=True,
line_width=1
)
))
def plot_2d(component1, component2):
fig = go.Figure(data=go.Scatter(
x = component1,
y = component2,
mode='markers',
marker=dict(
size=20,
color=y, #set color equal to a variable
colorscale='Rainbow', # one of plotly colorscales
showscale=True,
line_width=1
)
))
fig.update_layout(margin=dict( l=100,r=100,b=100,t=100),width=800,height=500)
fig.layout.template = 'plotly_dark'
fig.show()
def plot_3d(component1,component2,component3):
fig = go.Figure(data=[go.Scatter3d(
x=component1,
y=component2,
z=component3,
mode='markers',
marker=dict(
size=8,
color=y, # set color to an array/list of desired values
colorscale='Rainbow', # choose a colorscale
opacity=1,
line_width=1
)
)])
# tight layout
fig.update_layout(margin=dict(l=50,r=50,b=50,t=50),width=1000,height=900)
fig.layout.template = 'plotly_dark'
fig.show()
from sklearn.preprocessing import StandardScaler
## Standardizing the data
x = StandardScaler().fit_transform(x)
start = time.time()
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(x)
print('Duration: {} seconds'.format(time.time() - start))
principal = pd.DataFrame(data = principalComponents,
columns = ['principal component 1', 'principal component 2','principal component 3'])
Duration: 0.07524347305297852 seconds
plot_2d(principalComponents[:, 0],principalComponents[:, 1])
plot_3d(principalComponents[:, 0],principalComponents[:, 1],principalComponents[:, 2])
start = time.time()
pca_50 = PCA(n_components=50)
pca_result_50 = pca_50.fit_transform(x)
tsne = TSNE(random_state = 42, n_components=3,verbose=0, perplexity=40, n_iter=400).fit_transform(pca_result_50)
print('Duration: {} seconds'.format(time.time() - start))
Duration: 9.095139265060425 seconds
plot_2d(tsne[:, 0],tsne[:, 1])
plot_3d(tsne[:, 0],tsne[:, 1],tsne[:, 2])
!pip uninstall umap
!pip install umap-learn
^C Requirement already satisfied: umap-learn in c:\programdata\anaconda3\lib\site-packages (0.3.10) Requirement already satisfied: scikit-learn>=0.16 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.24.2) Requirement already satisfied: scipy>=0.19 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.7.1) Requirement already satisfied: numba>=0.37 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.54.1) Requirement already satisfied: numpy>=1.13 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.20.3) Requirement already satisfied: llvmlite<0.38,>=0.37.0rc1 in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (0.37.0) Requirement already satisfied: setuptools in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (58.0.4) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (2.2.0) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (1.1.0)
!pip install umap-learn
import umap
Requirement already satisfied: umap-learn in c:\programdata\anaconda3\lib\site-packages (0.3.10) Requirement already satisfied: scikit-learn>=0.16 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.24.2) Requirement already satisfied: scipy>=0.19 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.7.1) Requirement already satisfied: numba>=0.37 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.54.1) Requirement already satisfied: numpy>=1.13 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.20.3) Requirement already satisfied: setuptools in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (58.0.4) Requirement already satisfied: llvmlite<0.38,>=0.37.0rc1 in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (0.37.0) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (1.1.0) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (2.2.0)
start = time.time()
reducer = umap.UMAP(random_state=42,n_components=3)
embedding = reducer.fit_transform(x)
print('Duration: {} seconds'.format(time.time() - start))
Duration: 7.023167371749878 seconds
!pip install umap-learn -i https://mirrors.ustc.edu.cn/pypi/web/simple
Looking in indexes: https://mirrors.ustc.edu.cn/pypi/web/simple Requirement already satisfied: umap-learn in c:\programdata\anaconda3\lib\site-packages (0.3.10) Requirement already satisfied: numpy>=1.13 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.20.3) Requirement already satisfied: scikit-learn>=0.16 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.24.2) Requirement already satisfied: numba>=0.37 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (0.54.1) Requirement already satisfied: scipy>=0.19 in c:\programdata\anaconda3\lib\site-packages (from umap-learn) (1.7.1) Requirement already satisfied: llvmlite<0.38,>=0.37.0rc1 in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (0.37.0) Requirement already satisfied: setuptools in c:\programdata\anaconda3\lib\site-packages (from numba>=0.37->umap-learn) (58.0.4) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (2.2.0) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.16->umap-learn) (1.1.0)
plot_2d(reducer.embedding_[:, 0],reducer.embedding_[:, 1])
plot_3d(reducer.embedding_[:, 0],reducer.embedding_[:, 1],reducer.embedding_[:, 2])
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
start = time.time()
X_LDA = LDA(n_components=3).fit_transform(x,y)
print('Duration: {} seconds'.format(time.time() - start))
Duration: 0.409257173538208 seconds
plot_2d(X_LDA[:, 0],X_LDA[:, 1])
plot_3d(X_LDA[:, 0],X_LDA[:, 1],X_LDA[:, 2])